module net.BurtonRadons.parse.encoding;

private import std.string;

/** A character encoding; invalid characters return 0xFFFD. */
class Encoding
{
    static
    {
        Encoding ascii; /**< Normal 7-bit ASCII. */
        Encoding utf8; /**< UTF-8 using ISO-10646 range. */
        Encoding utf16; /**< UTF-16 using machine endianness. */
        Encoding utf16le; /**< UTF-16 little-endian. */
        Encoding utf16be; /**< UTF-16 big-endian. */
        Encoding utf32; /**< UTF-32 using machine endianness. */
        Encoding utf32be; /**< UTF-32 big-endian. */
        Encoding utf32le; /**< UTF-32 little-endian. */
        Encoding ucs2; /**< Two-octet BMP using machine endianness. */
        Encoding ucs2be; /**< Two-octet BMP using big-endian. */
        Encoding ucs2le; /**< Two-octet BMP using little-endian. */
    }

    /** Attempt to find an encoding with this name, returns null if not found. */
    static Encoding find (char [] name)
    {
        name = std.string.tolower (name);
        switch (name)
        {
            case "ansi_x3.4-1968":
            case "us-ascii":
            case "us":
            case "ascii":
            case "iso-ir-6":
            case "ansi_x3.4-1986":
            case "iso_646.irv:1991":
            case "iso646-us":
            case "ibm367":
            case "cp367":
                return ascii;

            case "utf8": return utf8;
            case "utf-8": return utf8;

            case "utf16": return utf16;
            case "utf-16": return utf16;
            case "utf16le": return utf16le;
            case "utf-16le": return utf16le;
            case "utf16be": return utf16be;
            case "utf-16be": return utf16be;

            case "utf32": return utf32;
            case "utf-32": return utf32;
            case "utf32be": return utf32be;
            case "utf32-be": return utf32be;
            case "utf32le": return utf32le;
            case "utf32-le": return utf32le;

            case "ucs4": return utf32;
            case "ucs-4": return utf32;
            case "ucs4be": return utf32be;
            case "ucs-4be": return utf32be;
            case "ucs4le": return utf32le;
            case "ucs-4le": return utf32le;
            case "iso-10646-ucs-4": return utf32;

            case "ucs2": return ucs2;
            case "ucs-2": return ucs2;
            case "iso-10646-ucs-2": return ucs2;
            case "ucs2be": return ucs2be;
            case "ucs-2be": return ucs2be;
            case "ucs2le": return ucs2le;
            case "ucs-2le": return ucs2le;
            default: return null;
        }
    }

    /** Check whether there's a BOM at the start of the string,
      * return the encoding you should use, and skip the pointer
      * over it.  If no BOM is found, utf8 is returned.  count is
      * the number of bytes in the string; four is needed to handle
      * all BOM.
      */
    static Encoding kaboom (inout ubyte *pointer, int count)
    {
        if (count >= 2 && pointer [0] == 0xFE && pointer [1] == 0xFF)
            return pointer += 2, utf16be;
        if (count >= 2 && pointer [0] == 0xFF && pointer [1] == 0xFE)
            return pointer += 2, utf16le;
        if (count >= 3 && pointer [0] == 0xEF && pointer [1] == 0xBB && pointer [2] == 0xBF)
            return pointer += 3, utf8;
        if (count >= 4 && pointer [0] == 0x00 && pointer [1] == 0x00 && pointer [2] == 0xFE && pointer [3] == 0xFF)
            return pointer += 4, utf32be;
        if (count >= 4 && pointer [0] == 0xFF && pointer [1] == 0xFE && pointer [2] == 0x00 && pointer [3] == 0x00)
            return pointer += 4, utf32le;
        return utf8;
    }

    /** Check whether there's a BOM at the start of the std.string. */
    static Encoding kaboom (inout ubyte [] string)
    {
        ubyte *pointer = string;
        Encoding encoding = kaboom (pointer, string.length);

        string = string [(int) (pointer - (ubyte *) string) .. string.length];
        return encoding;
    }

    /** Read the next character, increment the pointer. */
    abstract uint next (inout ubyte *pointer, ubyte *end);

    /** Read the previous character, decrement the pointer. */
    abstract uint prev (inout ubyte *pointer, ubyte *begin);

    /** Look at the next character without advancing. */
    uint peekNext (ubyte *pointer, ubyte *end)
    {
        return next (pointer, end);
    }

    /** Return the number of bytes this encoding would take, zero if it's unencodable. */
    abstract int length (uint ch);

    /** Encode in the array and increment the pointer. */
    abstract void put (uint ch, inout ubyte *pointer);

    /** Return whether this character can be encoded in the destination. */
    bit valid (uint ch) { return length (ch) != 0; }

    /** Convert a block of data into a string, filtering out-of-range values. */
    char [] toascii (ubyte [] data)
    {
        return (char []) convert (data, ascii);
    }

    /** Convert text from this encoding to another, filtering out-of-range values and 0xFFFD. */
    ubyte [] convert (ubyte [] data, Encoding dest)
    {
        if (this === dest)
            return data;

        ubyte *p = data, e = p + data.length, o;
        ubyte [] r;
        uint ch, l;

        while (p < e)
        {
            ch = next (p, e);

            if (ch == 0xFFFD || !dest.valid (ch))
                continue;
            l += dest.length (ch);
        }

        r = new ubyte [l];
        p = data;
        o = r;

        while (p < e)
        {
            ch = next (p, e);

            if (ch == 0xFFFD || !dest.valid (ch))
                continue;
            dest.put (ch, o);
        }

        return r;
    }
}

/** Good old 7-bit encoding. */
class Encoding_ASCII : Encoding
{
    static this ()
    {
        ascii = new Encoding_ASCII ();
    }

    final uint bound (uint ch)
    {
        if (ch > 0x7F)
            return 0xFFFD;
        return ch;
    }

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        return bound (*pointer ++);
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        return bound (*-- pointer);
    }

    override int length (uint ch)
    {
        if (ch < 0x80)
            return 1;
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        if (ch < 0x80)
            *pointer ++ = ch;
    }

    override ubyte [] convert (ubyte [] data, Encoding dest)
    {
        if (dest == utf8)
            return data;
        return super.convert (data, dest);
    }
}

/** UTF-8 Encoding, using ISO-10646 format (allows six-byte codes). */
class Encoding_UTF8 : Encoding
{
    static this ()
    {
        utf8 = new Encoding_UTF8 ();
    }

    override ubyte [] convert (ubyte [] data, Encoding dest)
    {
        if (dest == ascii)
        {
            for (int c; c < data.length; c ++)
                if (data [c] & 128)
                    return super.convert (data, dest);

            return data;
        }

        return super.convert (data, dest);
    }

/+
#ifndef DOXYGEN_SHOULD_SKIP_THIS
+/

    /* The number of bytes that should follow this value. */
    const byte [256] trailing =
    [
	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
    ];

    /*
     * Magic values subtracted from a buffer value during UTF8 conversion.
     * This table contains as many values as there might be trailing bytes
     * in a UTF-8 sequence.
     */
    const uint [6] offsets = 
    [ 
        0x00000000, 
        0x00003080, 
        0x000E2080, 
        0x03C82080, 
        0xFA082080, 
        0x82082080
    ];

/+
#endif
+/

    final bit legal (ubyte *source, int length)
    {
	    ubyte a;
	    ubyte *srcptr = source + length;
	
        switch (length)
        {
	        default: return false;
		        /* Everything else falls through when "true"... */
	        case 4: if ((a = (*-- srcptr)) < 0x80 || a > 0xBF) return false;
	        case 3: if ((a = (*-- srcptr)) < 0x80 || a > 0xBF) return false;
	        case 2: if ((a = (*-- srcptr)) > 0xBF) return false;
		        switch (*source)
                {
		            /* no fall-through in this inner switch */
		            case 0xE0: if (a < 0xA0) return false; break;
		            case 0xF0: if (a < 0x90) return false; break;
		            case 0xF4: if (a > 0x8F) return false; break;
		            default:  if (a < 0x80) return false;
		        }
    	    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
		            if (*source > 0xF4) return false;
	    }
	    return true;
    }

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        int total = trailing [*pointer];
        uint ch;

        if (total > end - pointer)
        {
            pointer = end;
            return 0xFFFD;
        }

        if (!legal (pointer, total + 1))
        {
            pointer ++;
            return 0xFFFD;
        }

        switch (total)
        {
            case 3: ch += *pointer ++; ch <<= 6;
            case 2: ch += *pointer ++; ch <<= 6;
            case 1: ch += *pointer ++; ch <<= 6;
            case 0: ch += *pointer ++;
        }

        return ch - offsets [total];
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        ubyte *orig = pointer;

        pointer --;
        if (*pointer < 128)
            return *pointer;

        while (pointer >= begin && (*pointer & 128) == 128 && (*pointer & 64) == 0)
            pointer --;

        ubyte *r = pointer;
        uint ch = next (pointer, orig);
        pointer = r;
        return ch;
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UTF8.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UTF8.put not implemented");
    }

    unittest
    {
        /* TODO: Doesn't test for correct handling of noncompliance. */
        static ubyte [] d1 = [0x66, 0x67, 0x68, 0xED, 0x95, 0x9C, 0xEA, 0xB5, 0xAD, 0xEC, 0x96, 0xB4];
        ubyte *p = d1, b = p, e = p + d1.length;

        with (new Encoding_UTF8 ())
        {
            assert (next (p, e) == 0x66);
            assert (next (p, e) == 0x67);
            assert (next (p, e) == 0x68);
            assert (next (p, e) == 0xD55C);
            assert (next (p, e) == 0xAD6D);
            assert (next (p, e) == 0xC5B4);
            assert (p == e);
            assert (prev (p, b) == 0xC5B4);
            assert (prev (p, b) == 0xAD6D);
            assert (prev (p, b) == 0xD55C);
            assert (prev (p, b) == 0x68);
            assert (prev (p, b) == 0x67);
            assert (prev (p, b) == 0x66);
            assert (p == b);
        }
    }
}

/** UTF-16 machine-endianness. */
class Encoding_UTF16 : Encoding
{
    static this ()
    {
        utf16 = new Encoding_UTF16 ();
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UTF16.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UTF16.put not implemented");
    }
        
    const uint surrogateHighStart = 0xD800;
    const uint surrogateHighEnd = 0xDBFF;
    const uint surrogateLowStart = 0xDC00;
    const uint surrogateLowEnd = 0xDFFF;
    const uint halfShift = 10;
    const uint halfBase = 0x0010000;
    const uint halfMask = 0x3FF;

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 2) { pointer = end; return 0xFFFD; }

        uint a, b;

        volatile a = *(ushort *) pointer;
        pointer += 2;
        if (a >= surrogateHighStart && a <= surrogateHighEnd)
        {
            if (end - pointer < 2) { pointer = end; return 0xFFFD; }
            volatile b = *(ushort *) pointer;
            pointer += 2;
            if (b >= surrogateLowStart && b <= surrogateLowEnd)
                return ((a - surrogateHighStart) << halfShift) + (b - surrogateLowStart) + halfBase;
            return 0xFFFD;
        }

        if (a >= surrogateLowStart && a <= surrogateLowEnd)
            return 0xFFFD;
        return a;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 2;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }

        uint b, a;

        volatile b = *(ushort *) pointer;

        if (b >= surrogateLowStart && b <= surrogateLowEnd)
        {
            pointer -= 2;
            if (pointer < begin) { pointer = begin; return 0xFFFD; }
            volatile a = *(ushort *) pointer;
            if (a >= surrogateHighStart && a <= surrogateHighEnd)
                return ((a - surrogateHighStart) << halfShift) + (b - surrogateLowStart) + halfBase;
            return 0xFFFD;
        }

        if (b >= surrogateHighStart && b <= surrogateHighEnd)
            return 0xFFFD;
        return b;
    }
}

/** UTF-16 big-endian. */
class Encoding_UTF16BE : Encoding
{
    static this ()
    {
        utf16be = new Encoding_UTF16BE ();
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UTF16BE.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UTF16BE.put not implemented");
    }
        
    const uint surrogateHighStart = 0xD800;
    const uint surrogateHighEnd = 0xDBFF;
    const uint surrogateLowStart = 0xDC00;
    const uint surrogateLowEnd = 0xDFFF;
    const uint halfShift = 10;
    const uint halfBase = 0x0010000;
    const uint halfMask = 0x3FF;

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 2) { pointer = end; return 0xFFFD; }

        uint a = (pointer [0] << 8) | pointer [1], b;

        pointer += 2;
        if (a >= surrogateHighStart && a <= surrogateHighEnd)
        {
            if (end - pointer < 2) { pointer = end; return 0xFFFD; }
            b = (pointer [0] << 8) | pointer [1];
            pointer += 2;
            if (b >= surrogateLowStart && b <= surrogateLowEnd)
                return ((a - surrogateHighStart) << halfShift) + (b - surrogateLowStart) + halfBase;
            return 0xFFFD;
        }

        if (a >= surrogateLowStart && a <= surrogateLowEnd)
            return 0xFFFD;
        return a;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 2;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }

        uint b = (pointer [0] << 8) | pointer [1], a;

        if (b >= surrogateLowStart && b <= surrogateLowEnd)
        {
            pointer -= 2;
            if (pointer < begin) { pointer = begin; return 0xFFFD; }
            a = (pointer [0] << 8) | pointer [1];
            if (a >= surrogateHighStart && a <= surrogateHighEnd)
                return ((a - surrogateHighStart) << halfShift) + (b - surrogateLowStart) + halfBase;
            return 0xFFFD;
        }

        if (b >= surrogateHighStart && b <= surrogateHighEnd)
            return 0xFFFD;
        return b;
    }
}

/** UTF-16 little-endian. */
class Encoding_UTF16LE : Encoding
{
    static this ()
    {
        utf16le = new Encoding_UTF16LE ();
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UTF16LE.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UTF16LE.put not implemented");
    }

    const uint surrogateHighStart = 0xD800;
    const uint surrogateHighEnd = 0xDBFF;
    const uint surrogateLowStart = 0xDC00;
    const uint surrogateLowEnd = 0xDFFF;
    const uint halfShift = 10;
    const uint halfBase = 0x0010000;
    const uint halfMask = 0x3FF;

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 2) { pointer = end; return 0xFFFD; }

        uint a = (pointer [1] << 8) | pointer [0], b;

        pointer += 2;
        if (a >= surrogateHighStart && a <= surrogateHighEnd)
        {
            if (end - pointer < 2) { pointer = end; return 0xFFFD; }
            b = (pointer [1] << 8) | pointer [0];
            pointer += 2;
            if (b >= surrogateLowStart && b <= surrogateLowEnd)
                return ((a - surrogateHighStart) << halfShift) + (b - surrogateLowStart) + halfBase;
            return 0xFFFD;
        }

        if (a >= surrogateLowStart && a <= surrogateLowEnd)
            return 0xFFFD;
        return a;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 2;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }

        uint b = (pointer [1] << 8) | pointer [0], a;

        if (b >= surrogateLowStart && b <= surrogateLowEnd)
        {
            pointer -= 2;
            if (pointer < begin) { pointer = begin; return 0xFFFD; }
            a = (pointer [1] << 8) | pointer [0];
            if (a >= surrogateHighStart && a <= surrogateHighEnd)
                return ((a - surrogateHighStart) << halfShift) + (b - surrogateLowStart) + halfBase;
            return 0xFFFD;
        }

        if (b >= surrogateHighStart && b <= surrogateHighEnd)
            return 0xFFFD;
        return b;
    }
}

/** UTF-32 with machine endianness. */
class Encoding_UTF32 : Encoding
{
    static this ()
    {
        utf32 = new Encoding_UTF32 ();
    }

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 4) { pointer = end; return 0xFFFD; }
        uint ch;

        volatile ch = *(uint *) pointer;
        pointer += 4;
        return ch;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 4;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }
        volatile return *(uint *) pointer;
    }

    char [] toascii (ubyte [] data)
    {
        return super.toascii (data);
    }

    char [] toascii (uint [] data)
    {
        return toascii (((ubyte *) data) [0 .. data.length * 4]);
    }

    override int length (uint ch)
    {
        return 4;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        volatile *(uint *) pointer = ch;
        pointer += 4;
    }
}

/** UTF-32 big-endian. */
class Encoding_UTF32BE : Encoding
{
    static this ()
    {
        utf32be = new Encoding_UTF32BE ();
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UTF32BE.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UTF32BE.put not implemented");
    }

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 4) { pointer = end; return 0xFFFD; }
        uint ch;

        ch = pointer [3] | (pointer [2] << 8) | (pointer [1] << 16) | (pointer [0] << 24);
        pointer += 4;
        return ch;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 4;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }
        return pointer [3] | (pointer [2] << 8) | (pointer [1] << 16) | (pointer [0] << 24);
    }
}

/** UTF-32 little-endian. */
class Encoding_UTF32LE : Encoding
{
    static this ()
    {
        utf32le = new Encoding_UTF32LE ();
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UTF32LE.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UTF32LE.put not implemented");
    }

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 4) { pointer = end; return 0xFFFD; }
        uint ch;

        ch = pointer [0] | (pointer [1] << 8) | (pointer [2] << 16) | (pointer [3] << 24);
        pointer += 4;
        return ch;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 4;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }
        return pointer [0] | (pointer [1] << 8) | (pointer [2] << 16) | (pointer [3] << 24);
    }
}

/** UCS-2 with machine endianness. */
class Encoding_UCS2 : Encoding
{
    static this ()
    {
        ucs2 = new Encoding_UCS2 ();
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UCS2.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UCS2.put not implemented");
    }

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 2) { pointer = end; return 0xFFFD; }
        uint ch;

        volatile ch = *(ushort *) pointer;
        pointer += 2;
        return ch;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 2;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }
        volatile return *(ushort *) pointer;
    }
}

/** UCS-2 with big-endian. */
class Encoding_UCS2BE : Encoding
{
    static this ()
    {
        ucs2be = new Encoding_UCS2BE ();
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UCS2BE.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UCS2BE.put not implemented");
    }

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 2) { pointer = end; return 0xFFFD; }
        uint ch;

        ch = pointer [1] | (pointer [0] << 8);
        pointer += 2;
        return ch;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 2;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }
        return pointer [1] | (pointer [0] << 8);
    }
}

/** UCS-2 with little-endian. */
class Encoding_UCS2LE : Encoding
{
    static this ()
    {
        ucs2be = new Encoding_UCS2LE ();
    }

    override int length (uint ch)
    {
        throw new Error ("Encoding_UCS2LE.length not implemented");
        return 0;
    }

    override void put (uint ch, inout ubyte *pointer)
    {
        throw new Error ("Encoding_UCS2LE.put not implemented");
    }

    override uint next (inout ubyte *pointer, ubyte *end)
    {
        if (end - pointer < 2) { pointer = end; return 0xFFFD; }
        uint ch;

        ch = pointer [0] | (pointer [1] << 8);
        pointer += 2;
        return ch;
    }

    override uint prev (inout ubyte *pointer, ubyte *begin)
    {
        pointer -= 2;
        if (pointer < begin) { pointer = begin; return 0xFFFD; }
        return pointer [0] | (pointer [1] << 8);
    }
}

/** A string object, in some arbitrary encoding. */
struct String
{
    ubyte [] data; /**< Source data. */
    Encoding encoding; /**< Encoding for the std.string. */
    uint length; /**< Length of the string in characters. */

    /** Convert the string to another encoding. */
    String convert (Encoding dest)
    {
        String result;

        if (dest === encoding)
            return *this;
        return AString (encoding.convert (data, dest), dest);
    }

    /** Convert the string to another encoding. */
    String convert (char [] name)
    {
        return convert (Encoding.find (name));
    }

    /** Convert the string to ASCII. */
    char [] ascii ()
    {
        return (char []) convert (Encoding.ascii).data;
    }

    /** Get a section of the string; out-of-bounds values are asserted. */
    String slice (int start, int end)
    {
        assert (start >= 0);
        assert (start <= length);
        assert (end >= 0);
        assert (end <= length);

        ubyte *p = data, q, e = p + data.length;

        for (int c; c < start; c ++)
            encoding.next (p, e);
        q = p;
        for (int c = start; c < end; c ++)
            encoding.next (q, e);

        return AString (p [0 .. (int) (q - p)], encoding, end - start);
    }

    /** Get a pointer in the string; out-of-bounds values are asserted. */
    ubyte *pointer (int index)
    {
        assert (index >= 0);
        assert (index <= length);

        ubyte *p = data, e = p + data.length;

        for (int c; c < index; c ++)
            encoding.next (p, e);

        return p;
    }
}

/** Create a std.string. */
String AString (ubyte [] data, Encoding encoding)
{
    uint length;

    length = 0;
    ubyte *p = data, e = p + data.length;
    while (p < e)
    {
        encoding.next (p, e);
        length ++;
    }

    return AString (data, encoding, length);
}

/** Create a string with all fields. */
String AString (ubyte [] data, Encoding encoding, uint length)
{
    String string;

    std.string.data = data;
    std.string.encoding = encoding;
    string.length = length;
    return string;
}
